package com.qunar.crawler.utils;

import java.text.SimpleDateFormat;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JsoupPossesserWeather {

    private static Pattern compile = Pattern.compile("<meta.*?charset=(\")?(.*?)\".*?>");

    private static SimpleDateFormat format = new SimpleDateFormat("yyyy");

    private static final Logger logger = LoggerFactory.getLogger(JsoupPossesserWeather.class);

    // parser html extract info combin into beans
    public static void digInfo(StringBuilder html, List<Bread> infos) {

        // System.out.println(html);
        // String province = null;
        // String city = null;
        // String zone = null;

        Document doc = Jsoup.parse(html.toString());

        Elements provinces = doc.select("div#listWrapper>ul>li");
        if (provinces.size() > 0) {

            for (Element e : provinces) {
                // System.out.println(e.toString());
                Document doc2 = Jsoup.parse(e.toString());

                String userName = doc2.select("div.userName").attr("rel");// 用户名

                String timeTamp = doc2.select("div.pubInfo.c_tx5").select("a.time").attr("rel");// 时间戳

                String msgCnt = doc2.select("div.msgCnt").text();// 微博内容

                String weibo_id = doc2.select("li").attr("id");// weibo_id

                String nickName = doc2.select("div.userName>strong>a").text();// 昵称

                String areaInfo = doc2.select("div.areaInfo").select("a").attr("pos");// 地理位置:经纬度

                String lat = "";
                String lon = "";
                if (areaInfo != null && !areaInfo.equals("")) {
                    String s[] = areaInfo.split(",");
                    lat = s[0];// 地理位置:经度
                    lon = s[1];// 地理位置:纬度
                }
                System.out.println(userName);
                System.out.println(timeTamp);
                System.out.println(msgCnt);
                System.out.println(weibo_id);
                System.out.println(nickName);
                System.out.println(lat);
                System.out.println(lon);
                System.out.println("=============================");
            }
        }

        /*
         * Elements citys = doc.select("select#city>option"); if(citys.size()>0){ for(Element e: citys){
         * if(e.hasAttr("selected")){ //System.out.println("city:" +StringUtils.substringAfterLast(e.text().trim(),
         * " ")); city = StringUtils.substringAfterLast(e.text().trim(), " "); break; } } } Elements zones =
         * doc.select("select#zone>option"); if(zones.size()>0){ for(Element e: zones){ if(e.hasAttr("selected")){
         * //System.out.println("zone:" +StringUtils.substringAfterLast(e.text().trim(), " ")); zone =
         * StringUtils.substringAfterLast(e.text().trim(), " "); break; } } } Elements weatherInfoRows =
         * doc.select("div.tqtongji2>ul"); if (weatherInfoRows.size() > 0) { for (Element e : weatherInfoRows) { if
         * (e.hasClass("t1")) { // System.out.println(e.attr("class")); continue; } Bread w = new Bread();
         */
        // w.setUrl(url);
        // w.setProvince(province);
        // w.setCity(city);
        // w.setZone(zone);
        // w.setDate(e.child(0).text());
        // w.setHighTem(Integer.valueOf(e.child(1).text()));
        // w.setLowTem(Integer.valueOf(e.child(2).text()));
        // w.setWeather(e.child(3).text());
        // w.setWindDirection(e.child(4).text());
        // w.setWindForce(e.child(5).text());
        // System.out.println(e.child(0).text());
        // System.out.println(e.child(1).text());
        // System.out.println(e.child(2).text());
        // System.out.println(e.child(3).text());
        // System.out.println(e.child(4).text());
        // System.out.println(e.child(5).text());
        // System.out.println(w);
        // infos.add(w);

        // }

        // }

    }

}
